cat(sprintf("\nScript 3: Wrangle Bovitz data (%s)\n", f_time_gmt()))

# Load data --------------------------------------------------------------------

# Load data (in wide format)
bovitz_w_raw <- fread(here("data", "bovitz", "bovitz_qualtrics_results.csv"))

# Only keep responses with valid RESPONDENT_ID and consent
bovitz_w <- copy(bovitz_w_raw)[
  str_detect(RESPONDENT_ID, "^i") & consent_agree == "Yes" & thoughtful == "Yes",
]

# If a participant (by survey company ID) responded more than once, keep final
bovitz_w[, n_RESPONDENT_ID := .N, by = RESPONDENT_ID]
bovitz_w <- bovitz_w[order(EndDate),]
bovitz_w[, seq_RESPONDENT_ID := 1:.N, by = RESPONDENT_ID]
bovitz_w <- rbind(bovitz_w[n_RESPONDENT_ID == 1,], 
                  bovitz_w[n_RESPONDENT_ID > 1 & n_RESPONDENT_ID == seq_RESPONDENT_ID])
bovitz_w <- bovitz_w[order(EndDate),]



# Merge-in education from Bovitz's separate dataset ----------------------------

## load data
bovitz_ed <- fread(here("data", "bovitz", "bovitz_background_covariates.csv"),
                   select = c("id", "education"))

## assign Bovitz education labels
setnames(bovitz_ed, "education", "education_bovitz_num")
bovitz_ed[
  , education_bovitz := education_bovitz_num |> 
    as.character() |> 
    fct_recode(
      "Less than high school graduate" = "1",
      "High school graduate, diploma or the equivalent (for example: GED)" = "2",
      "Some college credit, no degree" = "3",
      "Trade/technical/vocational training" = "4",
      "Associate degree" = "5",
      "Bachelor's degree" = "6",
      "Master's degree" = "7",
      "Professional degree" = "8",
      "Doctorate degree" = "9"
    )
]

## recode to follow Lucid/Cint's convention
bovitz_ed[
  , education := fct_recode(
    education_bovitz,
    "Some high school or less" = "Less than high school graduate",
    "High school graduate" = "High school graduate, diploma or the equivalent (for example: GED)",
    "Completed some college, but no degree" = "Some college credit, no degree",
    "Other post high school vocational training" = "Trade/technical/vocational training",
    "Associate's degree" = "Associate degree",
    "Master's or professional degree" = "Master's degree",
    "Master's or professional degree" = "Professional degree"
  ) |> 
    fct_relevel(
      "Some high school or less", "High school graduate", "Other post high school vocational training",
      "Completed some college, but no degree", "Associate's degree",
      "Bachelor's degree", "Master's or professional degree", "Doctorate degree"
    )
][
  , education_num := education |> 
    fct_recode(
      "1" = "Some high school or less",
      "2" = "High school graduate",
      "3" = "Other post high school vocational training",
      "4" = "Completed some college, but no degree",
      "5" = "Associate's degree",
      "6" = "Bachelor's degree",
      "7" = "Master's or professional degree",
      "8" = "Doctorate degree"
    ) |>
    as.character() |>
    as.numeric()
]

## merge Cint-style education values to main data
bovitz_w <- merge(bovitz_w, bovitz_ed[, .(id, education, education_num)],
                  all.x = T, sort = F, by.x = "RESPONDENT_ID", by.y = "id")

bovitz_w[, ed4 := fcase(
  education_num %in% 1:2, "HS or less",
  education_num %in% 3:4, "Some college",
  education_num %in% 5:6, "College",
  education_num %in% 7:8, "Postgraduate"
) |> factor(levels = c("HS or less", "Some college", "College", "Postgraduate"))]

# Handle background variables --------------------------------------------------

# Create numeracy (CRT) dummies and sum
bovitz_w[
  , ":="(crt1_dummy = fifelse(str_detect(crt1, "^5[^\\d]*$"), 1L, 0L),
         crt2_dummy = fifelse(str_detect(crt2, "^5[^\\d]*$"), 1L, 0L),
         crt3_dummy = fifelse(str_detect(crt3, "^47[^\\d]*$"), 1L, 0L))
][
  , numeracy := crt1_dummy + crt2_dummy + crt3_dummy
]

# Create political knowledge dummies and sum
bovitz_w[
  , ":="(know_house_dummy = as.integer(know_house == "Republicans"),
         know_senate_dummy = as.integer(know_house == "Republicans"),
         know_vance_dummy = as.integer(know_vance == "Vice President"),
         know_roberts_dummy = as.integer(know_roberts == "Chief Justice of the US Supreme Court"))
][
  , pol_awareness := know_house_dummy + know_senate_dummy + know_vance_dummy +
    know_roberts_dummy
]

# Create mock vignette dummies, sum, and categorical version
bovitz_w[
  , ":="(mock1_dummy = as.integer(mock1 == "Event licensing"),
         mock2_dummy = as.integer(mock2 == "A single license will cover all events occurring in a stadium"),
         mock3_dummy = as.integer(mock3 == "There may be a special hearing held by lawmakers"))
][
  , mock_count := mock1_dummy + mock2_dummy + mock3_dummy
][, f_mock_count := fcase(
  mock_count == 0, "None",
  mock_count %in% 1:2, "Some",
  mock_count == 3, "All"
) |> fct_relevel("None", "Some")
]

# Pivot to long ----------------------------------------------------------------

# Extract subset of the data with only acquiescence questions

vars_treatment <- names(bovitz_w) |>
  str_subset("__js.*_d$") |> 
  str_subset("ballot", T)

vars_responses <- vars_treatment |>
  str_remove("__js_") |>
  str_remove("_d$") |> 
  str_subset("ballot", T)

vars <- c("RESPONDENT_ID", vars_treatment, vars_responses)
bovitz_ss <- copy(bovitz_w)[, ..vars]
setnames(bovitz_ss, vars_treatment, str_remove(vars_treatment, "__js_"))
setnames(bovitz_ss, vars_responses, str_c(vars_responses, "_r"))

# Pivot wide-to-long: make unit of observation the respondent-question

bovitz_long <- bovitz_ss |>
  # pivot
  melt(
    id.vars = "RESPONDENT_ID",
    # keep treatment and response
    measure.vars = patterns("treatment" = "_d$", "response" = '_r$')
) |>
  # merge-in question labels
  merge(data.table(q = vars_responses,
                   variable = as.factor(1:length(vars_responses))),
        all.x = T, sort = F)

# reorder columns
setcolorder(bovitz_long, c("RESPONDENT_ID", "q", "treatment", "response"))
# refactor variables
bovitz_long[, variable := NULL]
bovitz_long[, treatment := as.integer(treatment)]
bovitz_long[, newdv := fcase(
  treatment == 1 & response %in% c("Yes, true"), 1,
  treatment == 0 & response %in% c("No false"), 1,
  treatment == 1 & response %in% c("No false"), 0,
  treatment == 0 & response %in% c("Yes, true"), 0,
  response == "Not sure", 0.5,
  default = NA_real_
)]
setnames(bovitz_long, "RESPONDENT_ID", "r_id")

# Add covariates to long dataset
bovitz <- merge(
  bovitz_long, 
  bovitz_w[, .(r_id = RESPONDENT_ID, sees_not_sure = `__js_sees_not_sure`, 
               education, education_num, ed4,
               numeracy, crt1_dummy, crt2_dummy, crt3_dummy,
               pol_awareness,  know_house_dummy, know_senate_dummy, know_vance_dummy, know_roberts_dummy,
               mock1_dummy, mock2_dummy, mock3_dummy, mock_count, f_mock_count)],
  all.x = T, sort = F
)

# Save intermediate dataset
cat(sprintf("--Save proc/bovitz.rds (%s)\n", f_time_gmt()))
saveRDS(bovitz, here("proc", "bovitz.rds"))
